﻿/*  
cd /projects/hsieh_project/proj_201809/code_1_data/
qsas data_1_aux_ind_top_sum.sas 5 &
*/

libname lbd "/projects/data_commons/lbd/";
libname hr "/projects/hsieh_project/proj_201809/data/";

%Let dir_out = /projects/hsieh_project/proj_201809/data/;


/*
================================================================================
Load Raw Data
*/

%include "/projects/hsieh_project/proj_201809/code_1_data/m_read.sas" /source2;
%m_read(param_dev=1,param_lyear=%quote(1977,2001,2013,2014,2016), param_czone=1, param_drop=1);

/*
--------------------------------------------------------------------------------
Define top firms by number of est
*/

%include "/projects/hsieh_project/code_0_general/m_perc_by_var.sas" /source2;

%Let ivar=lbdid;
%Let ivaro=n_est;
%Let vfirst=lbdid;

data dt_var;
  set lbd;
run;

proc sort data=dt_var; by year ch_ind firmnum &ivar.;

data dt_var;
  set dt_var;
  by year ch_ind firmnum &ivar.;
  if first.&vfirst.;
  &ivaro.=1; /* n_mkt variable */
run;

%put ----------------------------------------;
%put Define Top Firms;

/* Define top firms based on markets */

proc sort data=dt_var; by year ch_ind firmnum &ivar.; /* At geo level */

%m_perc_by_var(ds_in=dt_var, ds_out=dt_perc, var_in=&ivaro., var_by=%bquote(year ch_ind), var_unit=firmnum, var_out=mkt, var_runif=runif);

proc sort data=lbd; by year ch_ind firmnum;
proc sort data=dt_perc; by year ch_ind firmnum;

data lbd;
  merge lbd dt_perc;
  by year ch_ind firmnum;
run;

data lbd_init;
  set lbd;
run;

/* 
--------------------------------------------------------------------------------
Import list of fk_industries 
*/

proc import out=dt_cw datafile="&dir_out./cw_ind_fk_aux_v12.dta";
run;

/*
================================================================================
Calculate Auxiliary Employment
*/

%macro m_aux_agg(aux_def= , iperc= ) / minoperator;

/*
--------------------------------------------------------------------------------
Define auxiliary industries

Definition of auxiliary industry (aux_def):
1: FK 54 and 55 (assign by share)
2: Any ch_ind with FK 54 and 55 (assign by share)
3: FK 54 and 55 (w/ double counting)
4: Any ch_ind with FK 54 and 55 (w/ double counting)
54: Any ch_ind with FK 54 (assign by share)
55: Any ch_ind with FK 55 (assign by share)
Note further that any industry with 54 and 55 is not considered non-aux industry (and thus does not appear in the figures)

How auxiliary employment associated with a non-auxiliary industry is calculated:
1,2,54,55: = total auxiliary employment * employment share of the non-auxiliary industry (out of total non-auxiliary employment)
3,4: = total auxiliary employment

Some variables:
i_aux_ind = 1 ~ the industry has 54 or 55
i_aux_fk = 1 ~ the fk_naics is 54 or 55
i_aux = 1 ~ the establishment is auxiliary by current definition
*/

data dti_cw;
  set dt_cw;
  i_aux_ind = 1;
  if fk_naics2 in ("54","55") then i_aux_fk = 1;
  keep fk_naics i_aux_ind i_aux_fk fk_naics2;
run;

proc sort data=lbd_init; by fk_naics;
proc sort data=dti_cw; by fk_naics;

data lbd;
  merge lbd_init(in=in_x) dti_cw;
  by fk_naics;
  if in_x;
run;

%if &aux_def. in (1 3) %then %do;

%put ----------------------------------------;
%put Definition &aux_def.;

data lbd;
  set lbd;
  i_aux = i_aux_fk;
run;

%end;
%else %if &aux_def. in (2 4) %then %do;

%put ----------------------------------------;
%put Definition &aux_def.;

data lbd;
  set lbd;
  i_aux = i_aux_ind;
run;

%end;
%else %if &aux_def. in (54) %then %do;

%put ----------------------------------------;
%put Definition &aux_def.;

data lbd;
  set lbd;
  if i_aux_ind = 1 & ch_ind ~= 822 then i_aux = 1;
run;

%end;
%else %if &aux_def. in (55) %then %do;

%put ----------------------------------------;
%put Definition &aux_def.;

data lbd;
  set lbd;
  if i_aux_ind = 1 & ch_ind = 822 then i_aux = 1;
run;

%end;

/*
--------------------------------------------------------------------------------
Select firms with auxiliary establishment
1) to calculate auxiliary employment by firm
2) to assign auxiliary employment to non-auxiliary industry
*/

data dt_firmnum;
  set lbd;
  if i_aux = 1;
  if mkt_perc <= %sysevalf(&iperc. / 100);
run;

proc sort data=dt_firmnum; by year firmnum;

data dt_firmnum;
  set dt_firmnum;
  by year firmnum;
  if first.firmnum;
  i_aux_firm = 1;
  keep year firmnum i_aux_firm;
run;

proc sort data=lbd; by year firmnum;
proc sort data=dt_firmnum; by year firmnum;

data dt_firm_sel;
  merge lbd dt_firmnum(in=in_y);
  by year firmnum;
  if in_y;
run;

/* 
--------------------------------------------------------------------------------
Assign aux emp to non-aux ind at firm level

*_find = variable at the firm-industry level
*_f = variable at the firm level
*/

proc sort data=dt_firm_sel; by year firmnum i_aux_ind i_aux ch_ind;

proc means data=dt_firm_sel noprint;
  by year firmnum i_aux_ind i_aux ch_ind;
  output out=dt_sum1(drop=_type_ _freq_) sum(worker est salary) = emp_find est_find pay_find;
run;

proc means data=dt_firm_sel noprint;
  by year firmnum i_aux_ind i_aux;
  output out=dt_sum2(drop=_type_ _freq_) sum(worker est salary) = emp_f est_f pay_f;
run;

data dt_sum;
  merge dt_sum1 dt_sum2;
  by year firmnum i_aux_ind i_aux;
run;

data dt_sum_aux (keep=year firmnum emp_f_aux est_f_aux pay_f_aux);
  set dt_sum2;
  if i_aux_ind = 1 & i_aux = 1;
  rename emp_f=emp_f_aux est_f=est_f_aux pay_f=pay_f_aux;
run;

data dt_sum;
  merge dt_sum dt_sum_aux;
  by year firmnum;
run;

/*
----------------------------------------
Specify how aux emp is assigned
*/

%if &aux_def. in (1 2 54 55) %then %do;
  %Let m_weight = emp_find / emp_f; /* Assign based on emp share */
%end;
%else %if &aux_def. in (3 4) %then %do;
  %Let m_weight = 1; /* Assign allowing for double counting */
%end;

data dt_sum;
  set dt_sum;
  emp_find_aux = &m_weight. * emp_f_aux;
  est_find_aux = &m_weight. * est_f_aux;
  pay_find_aux = &m_weight. * pay_f_aux;
run;

/* 
--------------------------------------------------------------------------------
Aggregate to Industry Level

*_ind_aux: associated auxiliary variable at the industry level of the top firms. (e.g. aux emp at the ind level)
aux_def: Definition of auxiliary
mkt_est_perc: Percentile by number of establishment
*/

proc sort data=dt_sum; by year i_aux_ind ch_ind;
proc means data=dt_sum noprint;
  by year i_aux_ind ch_ind;
  output out=dti_out(drop=_type_ _freq_) sum(emp_find_aux est_find_aux pay_find_aux) = emp_ind_aux est_ind_aux pay_ind_aux;
run;

data dti_out;
  set dti_out;
  aux_def = &aux_def.;
  mkt_est_perc = &iperc.;
run;

/*
----------------------------------------
Append Industry Sum
*/

%if %sysfunc(exist(dt_out)) %then %do;
  data dt_out;
    set dt_out dti_out;
  run;
%end;
%else %do;
  data dt_out;
    set dti_out;
  run;
%end;

%mend;

/*
================================================================================
Run the main macro
*/

%m_aux_agg(aux_def=1,iperc=10);
%m_aux_agg(aux_def=2,iperc=10);
%m_aux_agg(aux_def=3,iperc=10);
%m_aux_agg(aux_def=4,iperc=10);
%m_aux_agg(aux_def=54,iperc=10);
%m_aux_agg(aux_def=55,iperc=10);
%m_aux_agg(aux_def=1,iperc=1);
%m_aux_agg(aux_def=2,iperc=1);
%m_aux_agg(aux_def=3,iperc=1);
%m_aux_agg(aux_def=4,iperc=1);
%m_aux_agg(aux_def=54,iperc=1);
%m_aux_agg(aux_def=55,iperc=1);

/*
--------------------------------------------------------------------------------
Export
*/

proc export data=dt_out outfile="&dir_out./aux_ind_top_sum.dta" replace;
run;

/* End of SAS file */
